import pandas as pd
import numpy as np

# 读取Excel文件的基本信息
def analyze_excel_structure(file_path):
    print(f"正在分析文件: {file_path}")
    
    try:
        # 读取Excel文件
        df = pd.read_excel(file_path)
        
        print(f"\n文件加载成功！")
        print(f"数据形状: {df.shape}")
        print(f"行数: {df.shape[0]}")
        print(f"列数: {df.shape[1]}")
        
        print(f"\n列名:")
        for i, col in enumerate(df.columns, 1):
            print(f"{i}. {col}")
        
        print(f"\n数据类型:")
        print(df.dtypes)
        
        print(f"\n前5行数据:")
        print(df.head())
        
        print(f"\n基本统计信息:")
        print(df.describe())
        
        print(f"\n缺失值统计:")
        missing_values = df.isnull().sum()
        print(missing_values[missing_values > 0])
        
        print(f"\n数据类型分布:")
        print(df.dtypes.value_counts())
        
        # 返回数据框供后续使用
        return df
        
    except Exception as e:
        print(f"读取文件时出错: {e}")
        return None

if __name__ == "__main__":
    # 上市公司营收数据文件路径
    file_path = "../chapter5/上市公司营收数据.xlsx"
    
    # 分析文件结构
    df = analyze_excel_structure(file_path)
    
    if df is not None:
        print(f"\n分析完成！")